draft code

John Nobles, Adam Gullion

2025-04-15

Variables of interest (hitting):

a

## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

b

c

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

d

## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

e

## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

f

## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

g

## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Variables of interest (pitching)

## Rows: 1,106
## Columns: 81
## $ last_name..first_name  <chr> "Colon, Bartolo", "Burnett, A.J.", "Hudson, Tim…
## $ player_id              <int> 112526, 150359, 218596, 279824, 282332, 285064,…
## $ year                   <int> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015,…
## $ player_age             <int> 42, 38, 39, 36, 34, 37, 40, 36, 36, 34, 35, 37,…
## $ p_game                 <int> 33, 26, 24, 32, 29, 33, 33, 37, 33, 26, 33, 29,…
## $ p_formatted_ip         <dbl> 194.2, 164.0, 123.2, 198.2, 167.1, 135.0, 214.1…
## $ pa                     <int> 815, 699, 525, 827, 726, 598, 884, 665, 896, 63…
## $ ab                     <int> 771, 633, 476, 768, 659, 528, 798, 607, 824, 55…
## $ hit                    <int> 217, 174, 134, 214, 188, 140, 195, 180, 211, 13…
## $ single                 <int> 149, 134, 103, 140, 134, 87, 125, 115, 156, 82,…
## $ double                 <int> 41, 24, 16, 46, 22, 29, 37, 34, 32, 35, 44, 31,…
## $ triple                 <int> 2, 5, 2, 6, 4, 7, 8, 2, 2, 3, 4, 11, 3, 0, 3, 2…
## $ home_run               <int> 25, 11, 13, 22, 28, 17, 25, 29, 21, 17, 26, 26,…
## $ strikeout              <int> 136, 143, 64, 91, 137, 108, 126, 108, 175, 134,…
## $ walk                   <int> 24, 49, 37, 33, 50, 58, 61, 43, 53, 65, 42, 51,…
## $ k_percent              <dbl> 16.7, 20.5, 12.2, 11.0, 18.9, 18.1, 14.3, 16.2,…
## $ bb_percent             <dbl> 2.9, 7.0, 7.0, 4.0, 6.9, 9.7, 6.9, 6.5, 5.9, 10…
## $ batting_avg            <dbl> 0.281, 0.275, 0.282, 0.279, 0.285, 0.265, 0.244…
## $ slg_percent            <dbl> 0.437, 0.381, 0.405, 0.440, 0.458, 0.443, 0.405…
## $ on_base_percent        <dbl> 0.304, 0.336, 0.340, 0.311, 0.338, 0.338, 0.303…
## $ on_base_plus_slg       <dbl> 0.741, 0.717, 0.745, 0.751, 0.796, 0.781, 0.708…
## $ babip                  <dbl> 0.311, 0.337, 0.301, 0.289, 0.320, 0.301, 0.258…
## $ p_era                  <dbl> 4.16, 3.18, 4.44, 3.81, 4.73, 4.67, 3.91, 5.85,…
## $ p_opp_batting_avg      <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ xba                    <dbl> 0.268, 0.261, 0.290, 0.285, 0.253, 0.270, 0.254…
## $ xslg                   <dbl> 0.427, 0.406, 0.428, 0.456, 0.378, 0.453, 0.398…
## $ woba                   <dbl> 0.316, 0.315, 0.327, 0.321, 0.343, 0.337, 0.307…
## $ xwoba                  <dbl> 0.314, 0.323, 0.346, 0.335, 0.304, 0.350, 0.314…
## $ xobp                   <dbl> 0.294, 0.326, 0.354, 0.319, 0.311, 0.346, 0.314…
## $ xiso                   <dbl> 0.159, 0.145, 0.138, 0.171, 0.125, 0.183, 0.144…
## $ xwobacon               <dbl> 0.368, 0.375, 0.366, 0.361, 0.345, 0.393, 0.333…
## $ exit_velocity_avg      <dbl> 88.9, 89.8, 90.3, 87.0, 87.8, 88.7, 87.4, 88.2,…
## $ launch_angle_avg       <dbl> 11.8, 5.6, 4.5, 11.7, 10.7, 12.3, 12.5, 13.4, 1…
## $ sweet_spot_percent     <dbl> 34.1, 30.4, 26.6, 33.3, 29.1, 33.8, 32.7, 36.7,…
## $ barrel_batted_rate     <dbl> 5.4, 4.8, 4.8, 4.9, 4.1, 8.2, 4.4, 7.5, 5.6, 4.…
## $ hard_hit_percent       <dbl> 34.7, 38.7, 34.1, 33.0, 31.1, 36.1, 30.5, 32.0,…
## $ avg_best_speed         <dbl> 79.31984, 79.73075, 80.68015, 76.00830, 77.8399…
## $ avg_hyper_speed        <dbl> 93.30319, 94.17833, 94.14691, 93.07544, 92.9267…
## $ whiff_percent          <dbl> 14.7, 22.3, 19.9, 11.9, 21.6, 16.9, 19.5, 22.4,…
## $ swing_percent          <dbl> 48.6, 45.1, 47.2, 48.1, 46.2, 43.0, 49.4, 46.1,…
## $ groundballs_percent    <dbl> 44.1, 55.0, 57.1, 47.7, 48.2, 47.1, 43.1, 41.2,…
## $ flyballs_percent       <dbl> 23.2, 14.1, 13.7, 20.7, 20.5, 18.2, 22.0, 21.0,…
## $ linedrives_percent     <dbl> 27.0, 26.8, 25.4, 24.7, 25.3, 28.2, 26.2, 31.0,…
## $ popups_percent         <dbl> 5.7, 4.0, 3.8, 6.9, 6.0, 6.5, 8.6, 6.9, 7.5, 4.…
## $ n_ff_formatted         <dbl> 29.1, 11.7, 7.0, 26.4, 25.2, 41.2, 11.3, 0.9, 3…
## $ ff_avg_speed           <dbl> 90.9, 91.7, 88.5, 84.5, 90.8, 91.6, 82.3, 89.8,…
## $ ff_avg_spin            <int> 2255, 2082, 2126, 2076, 2114, 2071, 1810, 2294,…
## $ ff_avg_break_x         <dbl> -6.9, -6.2, -6.8, 2.0, 6.8, -7.9, -13.4, -3.5, …
## $ ff_avg_break_z         <dbl> -18.2, -21.0, -23.5, -26.8, -19.0, -18.2, -31.3…
## $ ff_avg_break_z_induced <dbl> 15.5, 12.0, 11.8, 12.1, 14.7, 14.9, 9.8, 18.4, …
## $ ff_avg_break           <dbl> 17.2, 13.8, 13.9, 12.7, 16.4, 17.2, 17.2, 18.8,…
## $ n_sl_formatted         <dbl> 9.7, NA, NA, NA, 22.5, 15.5, NA, 27.2, NA, 1.0,…
## $ sl_avg_speed           <dbl> 82.8, NA, NA, NA, 79.6, 88.5, NA, 84.7, NA, 86.…
## $ sl_avg_spin            <int> 2178, NA, NA, NA, 1823, 1986, NA, 2115, NA, 171…
## $ sl_avg_break_x         <dbl> 2.9, NA, NA, NA, -11.2, 0.4, NA, 6.4, NA, 1.2, …
## $ sl_avg_break_z         <dbl> -37.3, NA, NA, NA, -44.2, -22.4, NA, -33.5, NA,…
## $ sl_avg_break_z_induced <dbl> 3.1, NA, NA, NA, -0.1, 12.8, NA, 5.0, NA, 4.7, …
## $ sl_avg_break           <dbl> 6.3, NA, NA, NA, 11.8, 13.0, NA, 8.9, NA, 6.2, …
## $ sl_range_speed         <dbl> 1.8, NA, NA, NA, 1.5, 1.3, NA, 1.6, NA, 2.4, 1.…
## $ n_ch_formatted         <dbl> 7.4, 8.8, NA, 21.1, 14.0, 8.7, NA, 21.0, 2.7, 1…
## $ ch_avg_speed           <dbl> 82.6, 86.3, NA, 78.7, 83.9, 84.0, NA, 79.8, 84.…
## $ ch_avg_spin            <int> 1727, 1678, NA, 1625, 1950, 1759, NA, 1519, 195…
## $ ch_avg_break_x         <dbl> -13.2, -10.8, NA, 14.1, 11.1, -13.7, NA, -12.6,…
## $ ch_avg_break_z         <dbl> -32.0, -33.4, NA, -37.9, -30.1, -30.1, NA, -35.…
## $ ch_avg_break_z_induced <dbl> 9.0, 3.7, NA, 7.1, 9.4, 9.3, NA, 8.4, 7.2, 3.0,…
## $ ch_avg_break           <dbl> 16.4, 12.0, NA, 16.2, 15.0, 17.0, NA, 15.6, 17.…
## $ ch_range_speed         <dbl> 2.4, 1.8, NA, 1.7, 1.2, 1.5, NA, 1.1, 2.3, 1.3,…
## $ n_cu_formatted         <dbl> 0.3, 29.4, 11.6, 8.6, NA, 20.6, NA, 6.8, 9.6, 6…
## $ cu_avg_speed           <dbl> 81.5, 82.3, 75.8, 71.7, NA, 77.0, NA, 75.3, 78.…
## $ cu_avg_spin            <int> 1758, 2023, 2260, 2137, NA, 1876, NA, 2248, 208…
## $ cu_avg_break_x         <dbl> -6.9, 8.7, 14.6, -8.2, NA, 4.3, NA, 9.0, 7.6, -…
## $ cu_avg_break_z         <dbl> -34.0, -50.0, -56.0, -61.9, NA, -54.3, NA, -62.…
## $ cu_avg_break_z_induced <dbl> 8.0, -9.0, -7.6, -7.7, NA, -7.6, NA, -13.7, -7.…
## $ cu_avg_break           <dbl> 13.2, 12.8, 16.8, 11.9, NA, 9.1, NA, 16.6, 11.1…
## $ cu_range_speed         <dbl> 1.5, 1.5, 1.6, 2.0, NA, 1.5, NA, 1.3, 2.5, 2.8,…
## $ n_st_formatted         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ st_avg_speed           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ st_avg_spin            <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ st_avg_break_x         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ st_avg_break_z         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ st_avg_break_z_induced <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…

h _ fast-ball spin correlation to k_percent

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 29 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 29 rows containing missing values or values outside the scale range
## (`geom_point()`).

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 29 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 29 rows containing missing values or values outside the scale range
## (`geom_point()`).

I ff quality vs OPS

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 29 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 29 rows containing missing values or values outside the scale range
## (`geom_point()`).

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 29 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 29 rows containing missing values or values outside the scale range
## (`geom_point()`).

J ff vs ex velo& La

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 29 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 29 rows containing missing values or values outside the scale range
## (`geom_point()`).

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 29 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 29 rows containing missing values or values outside the scale range
## (`geom_point()`).

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 29 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 29 rows containing missing values or values outside the scale range
## (`geom_point()`).

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 29 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 29 rows containing missing values or values outside the scale range
## (`geom_point()`).

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 29 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 29 rows containing missing values or values outside the scale range
## (`geom_point()`).

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 29 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 29 rows containing missing values or values outside the scale range
## (`geom_point()`).

k ff vs whiff%

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 29 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 29 rows containing missing values or values outside the scale range
## (`geom_point()`).

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 29 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 29 rows containing missing values or values outside the scale range
## (`geom_point()`).

L year vs ip

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

## # A tibble: 10 × 2
##     year avg_p_formatted_ip
##    <int>              <dbl>
##  1  2015              172. 
##  2  2016              167. 
##  3  2017              162. 
##  4  2018              161. 
##  5  2019              165. 
##  6  2020               59.0
##  7  2021              155. 
##  8  2022              159. 
##  9  2023              159. 
## 10  2024              160.
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    41.2   132.2   158.0   153.1   180.2   232.2
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2015    2017    2019    2019    2022    2024
## # A tibble: 9 × 2
##    year avg_p_formatted_ip
##   <int>              <dbl>
## 1  2015               172.
## 2  2016               167.
## 3  2017               162.
## 4  2018               161.
## 5  2019               165.
## 6  2021               155.
## 7  2022               159.
## 8  2023               159.
## 9  2024               160.
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

M ff vs sweetspot

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 29 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 29 rows containing missing values or values outside the scale range
## (`geom_point()`).

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 29 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 29 rows containing missing values or values outside the scale range
## (`geom_point()`).

N (Sticky Stuff)

## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

##   ff_avg_spin
## 1    2246.848
## # A tibble: 10 × 2
##     year ff_avg_spin
##    <int>       <dbl>
##  1  2015       2209.
##  2  2016       2234.
##  3  2017       2236 
##  4  2018       2235.
##  5  2019       2260.
##  6  2020       2278.
##  7  2021       2245.
##  8  2022       2246.
##  9  2023       2271.
## 10  2024       2266.
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'